#!/usr/bin/env python3

import collections
import json
import pathlib
import subprocess
import sys

DEFAULT_REF = 'origin/master'

THE_BIG_LIST_OF_NAUGHTY_FILES = list(map(pathlib.Path, [
    #
    # Problems with files listed here get lighter treatment: they are blocking
    # only when they are a part of the current pull request.
    #

    # for pylint
    'Documentation/conf.py',
    'LibOS/shim/test/ltp/contrib/conf_lint.py',
    'LibOS/shim/test/ltp/contrib/conf_merge.py',
    'LibOS/shim/test/ltp/contrib/conf_missing.py',
    'LibOS/shim/test/ltp/contrib/conf_remove_must_pass.py',
    'LibOS/shim/test/ltp/contrib/has_own_main.py',
    'LibOS/shim/test/ltp/contrib/report.py',
    'LibOS/shim/test/ltp/runltp_xml.py',
    'Examples/python-scipy-insecure/scripts/test-numpy.py',
    'Examples/python-scipy-insecure/scripts/test-scipy.py',
    'Examples/python-simple/scripts/benchrun.py',
    'Examples/python-simple/scripts/dummy-web-server.py',
    'Examples/python-simple/scripts/fibonacci.py',
    'Examples/python-simple/scripts/helloworld.py',
    'Examples/python-simple/scripts/test-http.py',
    'LibOS/shim/test/fs/test_fs.py',
    'LibOS/shim/test/regression/test_libos.py',
    'Pal/regression/test_pal.py',
    'Pal/src/host/Linux-SGX/sgx-driver/link-intel-driver.py',
    'Pal/src/host/Linux/pal-gdb.py',
    'Scripts/regression.py',
    'Tools',

    # for shellcheck
    'Examples/bash/scripts/bash_test.sh',
    'Examples/common_tools/benchmark-http.sh',
    'Examples/python-simple/run-tests.sh',
    'LibOS/shim/test/native',
    'Pal/src/host/Linux-SGX/debugger/gdb',
    'Scripts/list-all-graphene.sh',
    'Scripts/memusg',
    '.ci/run-pylint',
    '.ci/run-shellcheck',
]))

def get_diff_ranges(ref=DEFAULT_REF):
    '''Get chunks affected by a merge request

    Args:
        ref (str): a reference to diff the HEAD against (default: origin/master)

    Returns:
        dict: a dict with filenames in keys and list of `(start, end)` ranges in
            values (start is inclusive, end is not, wrt :py:func:`range`)
    '''
    files = collections.defaultdict(list)
    data = subprocess.check_output(['git', 'diff', '-U0', ref, 'HEAD']).decode()
    path = None

    for line in data.split('\n'):
        if line.startswith('+++ '):
            path = (None if line == '+++ /dev/null'
                else line.split('/', maxsplit=1)[-1])
            continue
        if line.startswith('@@ '):
            # @@ -8,0 +9 @@ [class name or previous line or whatever]
            if path is None: # /dev/null
                continue
            _, _, plus, *_ = line.split()
            start, length, *_ = *(int(i) for i in plus[1:].split(',')), 1
            if not length:
                # remove-only chunk
                continue
            files[path].append((start, start + length))

    return files

class Diff:
    '''A quick and dirty diff evaluator

    >>> diff = Diff()
    >>> (message['file'], message['line']) in diff
    True # or False
    >>> diff.message_is_important(message)
    True # or False

    The default diff is to the ``origin/master`` ref.
    '''
    # pylint: disable=too-few-public-methods

    def __init__(self, ref=DEFAULT_REF):
        self._files = get_diff_ranges(ref)

    def __contains__(self, pathline):
        path, line = pathline
        try:
            return any(start <= line < end for start, end in self._files[path])
        except KeyError:
            return False

    @staticmethod
    def message_is_important(message):
        path = pathlib.Path(message['path'])
        for i in THE_BIG_LIST_OF_NAUGHTY_FILES:
            try:
                path.relative_to(i)
                break
            except ValueError:
                # path is not .relative_to() the path from WHITELIST
                pass
        else:
            # not on whitelist: always complain
            return True

        # on whitelist: don't complain
        return False

def main():
    diff = Diff()
    with sys.stdin:
        pylint = json.load(sys.stdin)

    ret = 0
    for message in pylint:
        # shellcheck
        if 'path' not in message:
            message['path'] = message['file']
        if 'symbol' not in message:
            message['symbol'] = message['code']

        if diff.message_is_important(message):
            if not ret:
                print('MESSAGES AFFECTING THIS PR:')
            print('{path} +{line}:{column}: {symbol}: {message}'.format(
                **message))
            ret += 1

    return min(ret, 255)

if __name__ == '__main__':
    sys.exit(main())
